Data preparation

library(archivist)
library(knitr)
library(ggplot2)

# store everything in the archivist repo
addHooksToPrint(class=c("ggplot", "knitr_kable"),
                 repoDir = "arepo", 
                 repo = "Eseje", user = "pbiecek", subdir = "arepo")

library(scales)
library(dplyr)
library(tidyr)
library(gridExtra)
library(rworldmap)
library(ggthemes)
library(latticeExtra)
library(lattice)

q3 <- function(x) {
  a <- quantile(x, c(0.25,0.5,0.75))
  names(a) <- c("ymin", "y", "ymax")
  a
}

opts_chunk$set(comment=NA, fig.width=6, fig.height=6, results='asis', warning=FALSE, message=FALSE, cache=FALSE)

Data

Here we are using the SmarterPoland package and two datasets.

library(SmarterPoland)
print(kable(head(countries)))

Load: archivist::aread('pbiecek/Eseje/arepo/a0f2357b814a76a46f2a42e831ea5296')

country birth.rate death.rate population continent
Afghanistan 34.1 7.7 30552 Asia
Albania 12.9 9.4 3173 Europe
Algeria 24.3 5.7 39208 Africa
Andorra 8.9 8.4 79 Europe
Angola 44.1 13.9 21472 Africa
Antigua and Barbuda 16.5 6.8 90 Americas
print(kable(head(maturaExam)))

Load: archivist::aread('pbiecek/Eseje/arepo/9797e887eb6f35519246f51946388462')

id_ucznia punkty przedmiot rok
4 14 matematyka 2011
4 31 j. polski 2011
5 19 matematyka 2010
5 35 j. polski 2010
7 16 matematyka 2010
7 43 j. polski 2010

Layers

countriesMin <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == min(birth.rate, na.rm=TRUE))
countriesMax <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == max(birth.rate, na.rm=TRUE))
  
theme_ggplain <- theme_bw() + theme(panel.grid.major.x = element_line(color="white"), axis.ticks=element_line(size=0), axis.text=element_text(size=0))

# foirst example
countries$continent <- reorder(countries$continent, countries$birth.rate, median, na.rm=TRUE)

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_violin(scale="width", aes(fill=continent), color="white", alpha=0.4) + 
  stat_summary(fun.data = "q3", geom = "crossbar",
               colour = "red", width = 0.5) + 
  geom_jitter(aes(size=(population)^0.9),position=position_jitter(width = .45, height = 0),
             shape=15) +
  geom_rug(sides = "l") + 
  geom_text(data=countriesMin, vjust=2, color="blue3") + 
  geom_text(data=countriesMax, vjust=-1, color="blue3") + 
  theme_bw() + xlab("") + theme(legend.position="none", panel.grid.major.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/e10f9d223df408fca73ea548456493f4')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_jitter(color="white",size=0) +
  geom_text(data=countriesMin, vjust=2, color="blue3") + 
  geom_text(data=countriesMax, vjust=-1, color="blue3") + 
  theme_bw() + xlab("") + ylab("") + theme_ggplain

Load: archivist::aread('pbiecek/Eseje/arepo/cd104c2c8e808cf9bc698bf9068a139d')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_violin(scale="width", aes(fill=continent), color="white", alpha=0.4) + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") +theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/c305ba0666702d099fcf3e6e3d0164ba')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  stat_summary(fun.data = "q3", geom = "crossbar",
               colour = "red", width = 0.5) + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/221767305a306e0ce1a266f958f9b394')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_jitter(aes(size=(population)^0.9),position=position_jitter(width = .45, height = 0),
             shape=15) +
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/b019d3802841e4e383d0e442c3e96eae')

ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_rug(sides = "l") + 
  geom_text(data=countriesMin, vjust=2, color="white") + 
  geom_text(data=countriesMax, vjust=-1, color="white") + 
  theme_ggplain + xlab("") + ylab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/03ccb0454a57b2dece0b1275ab582469')

# second example

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_density_2d(h=c(10,10), color="grey") +
  geom_point()+
  geom_abline(intercept=0,slope=1) + 
  geom_point(data=countries[132,], color="red", size=4) + 
  theme_bw()  + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/e26b16a4ca4e76a6e59d657554d95964')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point() + coord_fixed() +
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/731702bb479129834b34a3352912ca7f')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_density_2d(h=c(10,10), color="grey") +
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/355c572620024e50e7b46ec6b89902c1')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_abline(intercept=0,slope=1) + 
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/59c16dace2ea77ef5d2d2236c79e7a50')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point(color="white") + coord_fixed() +
  geom_point(data=countries[132,], color="red", size=4) + 
  theme_ggplain + xlab("") + ylab("")   + xlim(0,50)+ylim(-10,20)+ coord_fixed(xlim=c(0,50),ylim=c(0,18), expand = FALSE) 

Load: archivist::aread('pbiecek/Eseje/arepo/cf6c007123eef41d0b940a417ec8e110')

Mappings

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point() + coord_fixed() +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/59fb8661c9028424c98e58931f7ee982')

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/62679976f7ce90c10e5069656a5f7019')

countries$populationCat <- cut(countries$population, 
                               c(1, 10^3, 10^4, 10^5, 10^6, 10^7), 
                               labels = c("< 1M", "< 10M","< 100 M", "< 1 B", "> 1 B"), ordered_result = TRUE)

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent,
                      size=populationCat)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/e38e4767455c018690f53c806a931f0d')

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=populationCat, shape=continent,
                      size=populationCat)) +
  geom_point() + coord_fixed() + scale_color_brewer(type="seq") + 
  scale_size_manual(values=c(1,2,3,6,9))+
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/64fc76aeae21cf01a3c8a1459a3da075')

Forms / Geoms

Points

# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/1498be8bf3017e66abf903d5839b344f')

# dotplot
ggplot(countries, aes(x = birth.rate, y =death.rate)) +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/ae416cda9976041f6eb610bcdb5ee682')

# jitter
ggplot(countries, aes(x = continent, y =birth.rate)) +
  geom_jitter(position = position_jitter(width = .2)) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/4fcde4248e8873fe1e9fbecd169c988f')

# różne mapownia
ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/5218bc5e33667ad38e5731d12f778ebf')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent, color=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/6989132c4c4fe2c0d89d7f012ecd362f')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, color=continent), size=4, shape=19) +
  theme_bw() + scale_color_brewer(type = "qual", palette=6) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/84981b4356b7ab88c4796188312da625')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, size=population)) +
  scale_size_continuous(trans="sqrt", label=comma, limits=c(0,1500000)) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/32827b02638fe2305066611c25ec6823')

Area

# density
ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density(alpha=0.5)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position=c(0.85,0.85))

Load: archivist::aread('pbiecek/Eseje/arepo/917f5dd9328ca46b6d4cd7a4be125005')

ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density( position="fill", color=NA)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/2e40defadc4ff00b6fcdcfba6564196e')

# vioplot
ggplot(countries, aes(x = continent, y = birth.rate, color=continent,fill=continent)) +
  geom_violin(scale = "width")+ scale_color_brewer(type = "qual", palette=6)+ scale_fill_brewer(type = "qual", palette=6) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/708a24b35f12b42f4545fed761c84e6d')

# ribbon
ndf <- countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, population, na.rm=TRUE))

ggplot() + 
  geom_ribbon(data=ndf, aes(x=continent, ymax=birth.rate, y=birth.rate, ymin=0, group=1), fill="green3") +
  geom_ribbon(data=ndf, aes(x=continent, ymax=death.rate, y=death.rate, ymin=0, group=1), fill="red3") +
  theme_bw() + xlab("") + ylab("birth.rate / death.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/04dda1c55a6bdc09d4cfc6c7addc8a58')

Rectangles

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population))

# hist
ggplot(countries, aes(x = birth.rate, fill=continent)) +
  geom_histogram(binwidth = 2.5) + scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/cb7f3e74f607faa42577f9b944f90858')

# bar
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rect(xmin=12.38, xmax=27.85, ymin=0, ymax=18, alpha=0.3, fill="grey90") +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/8988de5b1d5c3c1418ee1c328bf125e9')

# bar
ggplot(continents, aes(x = continent, y = birth.rate)) +
  geom_bar(stat = "identity") +scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/a4e997d968124a3306911c64e055705f')

continents2 <- continents %>%
  mutate(cum = cumsum(population)-population)

ggplot() +
  geom_rect(data=continents2, aes(xmin=cum, ymax=0, xmax=cum+population, ymin=birth.rate, fill=continent)) +
  geom_text(data=continents2, aes(label=continent, x=cum+population/2, y=birth.rate), vjust=-0.1) +scale_fill_brewer(type = "qual", palette = 6)+ 
  theme_bw() + theme(legend.position="none") +
  xlab("population") + ylab("birth.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/70dd28f0aed966219f5e830a5bf4bcd3')

ggplot() +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))-0.1, 
                                 ymin = 0, 
                                 xmax = as.numeric(factor(continent))+0.2, 
                                 ymax = birth.rate),
            fill="green3") +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))+0.21, 
                                 ymin = birth.rate - death.rate, 
                                 xmax = as.numeric(factor(continent))+0.51, 
                                 ymax = birth.rate),
            fill="red3") +
  geom_text(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                 y = birth.rate + 1, 
                                 label = continent)) + 
  geom_hline(yintercept=0) + ylab("birth.rate - death.rate") + xlab("") +
  geom_rug(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                y = birth.rate - death.rate), sides="l") +
  theme_bw() + theme(axis.text.x = element_text(color="white"),
                     axis.ticks.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/a40ed22938896ecfc260dc7582e91933')

# liczba krajĂłw na kontynent
ggplot(countries, aes(x = continent, fill = continent)) +
  geom_bar() +scale_fill_brewer(type = "qual", palette = 6) + 
  theme_bw() + theme(legend.position="none") + xlab("") + ylab("Liczba krajĂłw")

Load: archivist::aread('pbiecek/Eseje/arepo/f75aafb1ee8fc5348dfbb030c2d84dc3')

Lines

# slope charts
countries3 <- countries %>% 
  gather(rate, values, birth.rate, death.rate) %>%
  group_by(continent, rate) %>%
  summarise(values = mean(values, na.rm=TRUE))
ggplot(countries3, aes(x = rate, y = values, group=continent, color=continent)) +
  geom_line(size=2) +
  geom_point(size=4) +
  theme_bw() + theme(legend.position="none")+scale_color_brewer(type = "qual", palette = 6) 

Load: archivist::aread('pbiecek/Eseje/arepo/ae65a8817b0d2150536f569d9f51185a')

# line
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_line() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/303d8e8c9a2f59a0e75970e3ae54a58e')

# smooth
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="white", alpha=0) +
  geom_smooth(se=FALSE, size=3, color="black") +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/b4a9411aa21badaed27dbdf3c32b04b0')

ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="black", alpha=0.3) +
  geom_smooth(se=FALSE, size=2, color="red4", method="lm", formula = y~poly(x,1)) +
  geom_smooth(se=FALSE, size=2, color="red3", method="lm", formula = y~poly(x,2)) +
  geom_smooth(se=FALSE, size=2, color="red1", span=0.5) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/2be00d7306ba50c6030b66e30b3ebfec')

# arrow
library(grid)
countries$country <- reorder(countries$country, countries$birth.rate, mean)
ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                  y = birth.rate, yend=death.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/f788442f1871ac92089f5c116c333ee4')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     color=death.rate > birth.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_color_manual(values = c("green3", "red3"), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/5e4e544b8273f536ff73aee592cf7987')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     size=population),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/81dd454dc07cdf9b183763782adfffa5')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     linetype=birth.rate > death.rate),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_linetype_manual(values = c(1,2), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/54932b1518bed31a4649d25d8e4a9e6b')

# error bars
conts <- countries %>% 
  group_by(continent) %>%
  summarise(bmin = min(birth.rate, na.rm=TRUE),
            bmax = max(birth.rate, na.rm=TRUE),
            bmea = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            dmin = min(death.rate, na.rm=TRUE),
            dmax = max(death.rate, na.rm=TRUE),
            dmea = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population, na.rm=TRUE)
  )

ggplot(conts, aes(x = bmea, y = dmea, 
                  ymin = dmin, ymax = dmax,
                  xmin = bmin, xmax = bmax,
                  color=continent))+
 geom_point() + 
  geom_errorbar(width=0.5) +
#  geom_errorbarh(width=0.5) + 
  theme_bw() + xlab("birth.rate") + ylab("death.rate") +
  theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/b69f97739b3ffebe2e95f74b03708fcc')

Other geoms

# stat_binhex
# geomBinHex.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  stat_binhex(bins = 9) + scale_fill_gradient(low = "white", high = "black") + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/de9105767dc331bfadbcfb682404bffe')

# geomRug
# geomRug.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rug() + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/186ed12a13fb00167bc17bd931448c00')

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = mean(birth.rate, na.rm=TRUE),
            death.rate = mean(death.rate, na.rm=TRUE))
  
# text
#geomText.pdf
ggplot(continents, aes(x = birth.rate, y = death.rate, label = continent)) +
  geom_text(alpha=1) +
  theme_bw() + xlim(8,35)

Load: archivist::aread('pbiecek/Eseje/arepo/b93859f2651718d470f769fe8a33533d')

Stats

# boxplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_boxplot(fill="grey", coef = 3) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/d4a4c70500b2fa430be32553a3ed7fa0')

# crossbar
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_jitter(position=position_jitter(width=0.25), color="grey") + 
  stat_summary(fun.data = "mean_cl_boot", geom = "crossbar", width = 0.3)  +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/8a1114de5c30b4960c8384c8084aa0bd')

# arrows
library(tidyr)
library(grid)

countries %a%
  group_by(continent) %a%
  summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
            q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
            q3 = quantile(birth.rate, 0.75, na.rm=TRUE)) %a%
  gather(key, value, -continent) %a%
  ggplot(aes(x=continent, y=value, group=continent)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_path(arrow=arrow(ends = "both"), size=2)  +
  theme_bw() + xlab("") + ylab("Quantiles")

Load: archivist::aread('pbiecek/Eseje/arepo/2b6dea731c66a777cff2f9f5664397c8')

# arrows
cq <- countries %>%
  group_by(continent) %>%
  summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
            q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
            q3 = quantile(birth.rate, 0.75, na.rm=TRUE))
cq13 <- cq %>%
  gather(key, value, -continent)

# statQ1.pdf
ggplot(cq13, aes(x=continent, y=value, group=continent)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_path(arrow=arrow(ends = "both"), size=2) + 
  geom_point(data=cq13[cq13$key == "q2",], aes(x=continent, y=value), size=5) +
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/b4fa78b4ca09f5cc9206ac2d8949f4bb')

# statQ2.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_boxplot(aes(ymin=q1, lower=q1, middle=q2, upper=q3, ymax=q3), stat="identity") +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/81d8abd99a12a35f280cf4c413192423')

# statQ3.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_jitter(data=countries, aes(x = continent, y = birth.rate), position=position_jitter(width=0.25), color="grey") + 
  geom_errorbar(aes(ymin=q1, ymax=q3), stat="identity", width=0.3, size=2) +
  geom_text(label="*", size=25) + 
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/f00dfaa80fa08af4864d648be7f06905')

# contour
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  geom_density2d(h=c(10,10), color="grey") +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/f50857ab0fcabe83cf8559bb0dd5d2d6')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="polygon", alpha=0.25) +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/de437e1db2cc7be55dfa4f5fdc27bfab')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="tile", aes(fill = ..density..), contour = FALSE) + scale_fill_gradient(low="white", high="black") +
  theme_minimal() + theme(legend.position="top")

Load: archivist::aread('pbiecek/Eseje/arepo/fb2444c1e2eea5a35d7b4cc2b679b5df')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), n=c(50,15), geom="point", aes(size = ..density..), contour = FALSE) + scale_size_continuous(range=c(0,4)) +
  theme_minimal()  + theme(legend.position="top")

Load: archivist::aread('pbiecek/Eseje/arepo/e7827e5bc77332bb87511e4bf92fe389')

Position modifiers

maturaExam2 <- spread(maturaExam, przedmiot, punkty)
tab <- table(matematyka = cut(rank(maturaExam2$matematyka)/nrow(maturaExam2), c(0,0.25,0.5,0.75,1), labels = c("matematyka <25%", "matematyka 25%-50%", "matematyka 50%-75%", "matematyka >75%")),
             polski = cut(rank(maturaExam2[,"j. polski"])/nrow(maturaExam2), c(0,0.25,0.5,0.75,1), labels = c("polski <25%", "polski 25%-50%", "polski 50%-75%", "polski >75%")))

df <- data.frame(tab)
dfn <- colnames(df)
ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "fill") + theme_bw() +
  scale_y_continuous(label = percent) + scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_fill") + xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/a87f5bccf9d805ae4e68824ed64457b9')

ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "stack") + theme_bw() +
   scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_stack")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/370fc55aee7b86924e8080ffd6741304')

ggplot(df, aes_string(x = dfn[1], fill = dfn[2], y = dfn[3])) +
  geom_bar(stat = "identity", position = "dodge") + theme_bw() +
  scale_fill_brewer(type = "div")+
  ylab("") + theme(legend.position="none",
                   axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_dodge")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/0b2ac5977bb6dbc883a2e7edf2e830e7')

# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_point() +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_identity")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/a807f654e30ef4a95feec6a8ff9c1b7d')

ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_dotplot")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/30babc6c1a8444929f7ccf6932adf13f')

ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_jitter(position=position_jitter(0.35)) +
  theme_bw() + #theme(axis.text.x  = element_text(angle=90, vjust=0.5)) +
  ggtitle("position_jitter")+ xlab("")

Load: archivist::aread('pbiecek/Eseje/arepo/ddab839814295b3c772ba8634cf1c859')

Coordinate system

# dotplot
ddplot <- ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2) +
  geom_smooth(size=2, se=F) + 
  theme_bw() + theme(legend.position="none", text=element_text(size=20))

ddplot

Load: archivist::aread('pbiecek/Eseje/arepo/686fa982ddb85b018ba5f6500e8374da')

ddplot + coord_fixed()

Load: archivist::aread('pbiecek/Eseje/arepo/fe3441a5b36533e5fb08f96fad47d5ff')

ddplot + coord_polar()

Load: archivist::aread('pbiecek/Eseje/arepo/6bda1a221c9043f24d29fcdd28af41db')

ddplot + coord_trans(x = "log2", y = "sqrt")

Load: archivist::aread('pbiecek/Eseje/arepo/7166fdb34b8e490ff67d0c61e772750f')

ddplot + coord_flip()

Load: archivist::aread('pbiecek/Eseje/arepo/fede9877a22fa35acc83877cfcadd44f')

# Mapy
map.world <- map_data(map="world")
countries[177,"country"] = "UK"
countries[135,"country"] = "Russia"
countries[76,"country"] = "Iran"

map2 <- merge(map.world, countries, by.x="region", by.y="country")
map3 <- map2[order(map2$order),]

gg <- ggplot()
gg <- gg + theme(legend.position="none")
gg <- gg + geom_map(data=map3, map=map3, color="grey", aes(map_id=region, x=long, y=lat, fill=birth.rate))
gg <- gg + scale_fill_gradient(low = "yellow", high = "blue4", guide = "colourbar") + theme_bw() + ylim(10,70)+ xlim(-25,50) +
  theme(text=element_text(size=20))+theme_map()

gg + coord_map("mollweide") 

Load: archivist::aread('pbiecek/Eseje/arepo/5461834ee06cee459d758019c2a95712')

gg + coord_map("ortho", orientation=c(60,20,-10)) 

Load: archivist::aread('pbiecek/Eseje/arepo/a3318105496464eefa4133e5f454fb7c')

gg+ coord_map()

Load: archivist::aread('pbiecek/Eseje/arepo/c182dae4bc843e6fc8ffe91847a0b0ff')

Scales

# Scale_fill_brewer
df <- data.frame(a=factor(1:8))

pl1 <- list()
pl2 <- list()
pl3 <- list()
for (i in 1:8) {
  pl1[[i]] <- ggplot(df, aes(x=a,fill=a)) +
    geom_bar() +
    scale_fill_brewer(type="qual",palette = i) +
    theme(axis.title.x = element_text(size=0, color="black"),
          axis.title.y = element_text(size=0, color="black"),
          axis.text.y = element_text(size=0, color="black"),
          axis.text.x = element_text(size=0, color="black"),
          line = element_blank(), rect = element_blank(), #text = element_blank(),
          plot.margin = grid::unit(c(0, 0, 0, 0), "lines"), complete = TRUE,
          legend.position="none") +
    ggtitle(paste("type = qual, palette =",i,""))
  pl2[[i]] <- pl1[[i]] + scale_fill_brewer(type="div",palette = i) +
    ggtitle(paste("type = div, palette =",i,""))
  pl3[[i]] <- pl1[[i]] + scale_fill_brewer(type="seq",palette = i) +
    ggtitle(paste("type = seq, palette =",i,""))
}

pl <- c(pl1, pl2, pl3)
pl4 <- sapply(1:8, function(i) list(pl1[[i]], pl2[[i]], pl3[[i]]))
pl$ncol=1
pl$nrow =24
pl4$ncol=3
pl4$nrow =8

pl1$ncol=1
pl1$nrow =8
pl2$ncol=1
pl2$nrow =8
pl3$ncol=1
pl3$nrow =8
do.call(grid.arrange, pl1)

do.call(grid.arrange, pl2)

do.call(grid.arrange, pl3)

do.call(grid.arrange, pl)

do.call(grid.arrange, pl4)

scales_x

library(ggthemes)

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2, color="black") +
  theme_bw()

pl <- list()

pl[[1]] <- plD + ggtitle("default")
pl[[2]] <- plD 
#pl[[2]] <- plD + scale_x_tufte() + scale_y_tufte() + ggtitle("_tufte")
pl[[3]] <- plD + scale_x_sqrt() + scale_y_sqrt() + ggtitle("_sqrt")
pl[[4]] <- plD + scale_x_reverse() + scale_y_reverse() + ggtitle("_reverse")
pl[[5]] <- plD + scale_x_log10() + scale_y_log10() + ggtitle("_log10")
pl[[6]] <- plD + scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") + ggtitle("_log2")


sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/06f89542c3f70d65a115d3166248ae18') Load: archivist::aread('pbiecek/Eseje/arepo/4d6eae26bf75ad3179b8f74ff8ba2942') Load: archivist::aread('pbiecek/Eseje/arepo/18c4290a115fdb95c1e5f73060a2030a') Load: archivist::aread('pbiecek/Eseje/arepo/90579f39567196aca54a59164292b32f') Load: archivist::aread('pbiecek/Eseje/arepo/7a0597a19409db0dbb05266747d08458') Load: archivist::aread('pbiecek/Eseje/arepo/c3c4fd69b501c52a63fce0f3400212a5') [,1] [,2] [,3] [,4] [,5] [,6]
data List,1 List,1 List,1 List,1 List,1 List,1 panel List,5 List,5 List,5 List,5 List,5 List,5 plot List,9 List,9 List,9 List,9 List,9 List,9

do.call(grid.arrange, pl)

shape

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate, shape=continent, color=continent)) +
  geom_point(size=5) +
  theme_bw()

pl <- list()

pl[[1]] <- plD + ggtitle("default")
pl[[2]] <- plD + scale_shape_stata() + ggtitle("_stata")
pl[[3]] <- plD + scale_shape_calc() + ggtitle("_calc")
pl[[4]] <- plD + scale_shape_tableau() + ggtitle("_tableau")
pl[[5]] <- plD + scale_shape_cleveland() + ggtitle("_cleveland")
pl[[6]] <- plD + scale_shape_manual(values=LETTERS) + ggtitle("_manual")


sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/11ed6c45d03a4d32e681ee28df195dd0') Load: archivist::aread('pbiecek/Eseje/arepo/60cddd573dc87dd8232ca906157b01ec') Load: archivist::aread('pbiecek/Eseje/arepo/06b7d591b2156407834a5140cbbf03d6') Load: archivist::aread('pbiecek/Eseje/arepo/137915912980352e3fccf745eabcd2c8') Load: archivist::aread('pbiecek/Eseje/arepo/10bd7f9800c77904d68ce9f79635918b') Load: archivist::aread('pbiecek/Eseje/arepo/0053aec66b81e94f20a8bdf5a810ef5c') [,1] [,2] [,3] [,4] [,5] [,6]
data List,1 List,1 List,1 List,1 List,1 List,1 panel List,5 List,5 List,5 List,5 List,5 List,5 plot List,9 List,9 List,9 List,9 List,9 List,9

do.call(grid.arrange, pl)

Panels / facets

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  stat_ellipse(color="red4")+
  geom_point(data=countries[,-5],size=1,color="grey") +
  geom_point(size=2, color="red") +
  theme_bw() + theme(legend.position="none") +
  theme(text=element_text(size=20)) +
  scale_x_sqrt() + scale_y_sqrt()

plD + facet_wrap(~continent)

Load: archivist::aread('pbiecek/Eseje/arepo/b27343887308add362245bcbcf6e89a5')

ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate, color=continent)) +
  stat_ellipse()+
  geom_point(size=2) +
  theme_bw() + theme(legend.position="none") +
  theme(text=element_text(size=20)) +
  scale_x_sqrt() + scale_y_sqrt()

Load: archivist::aread('pbiecek/Eseje/arepo/423d08e9ac74caa1621b5628ce3d4340')

Themes

plD <- ggplot(na.omit(countries), aes(x = birth.rate, y = death.rate)) +
  geom_point(size=2, color="black") +
  scale_x_sqrt() 

pl <- list()
pl[[1]] <- plD +   theme_bw() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_bw")
#pl[[2]] <- plD # +   theme_calc() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_calc")
pl[[3]] <- plD +   theme_classic() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_classic")
#pl[[4]] <- plD +   theme_economist() +  theme(legend.position="none") +
#  theme(text=element_text(size=20, margin = 0)) + ggtitle("theme_economist")
#pl[[5]] <- plD +   theme_economist_white() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_economist_white")
pl[[6]] <- plD +   theme_excel() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_excel")
pl[[7]] <- plD +   theme_few() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_few")
#pl[[8]] <- plD +   theme_fivethirtyeight() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_fivethirtyeight")
#pl[[9]] <- plD +   theme_foundation() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_foundation")
pl[[10]] <- plD +   theme_gray() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_gray")
pl[[11]] <- plD +   theme_map() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_map")
pl[[12]] <- plD +   theme_pander() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_pander")
pl[[13]] <- plD +   theme_solarized() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_solarized")
#pl[[14]] <- plD +   theme_stata() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_stata")
#pl[[15]] <- plD +   theme_wsj() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_wsj")
pl[[16]] <- plD +   theme_tufte() +  theme(legend.position="none") +
  theme(text=element_text(size=20)) + ggtitle("theme_tufte")
#pl[[17]] <- plD +   theme_solarized_2() +  theme(legend.position="none") +
#  theme(text=element_text(size=20)) + ggtitle("theme_solarized_2")

tmp <- sapply(pl, print)

Load: archivist::aread('pbiecek/Eseje/arepo/723f9b3b123155407e7cdc02bbb85e91') NULL Load: archivist::aread('pbiecek/Eseje/arepo/7c931709f71387efc7df9fc6bc14dfff') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/b7de8a1ced3b83131c38619579b60534') Load: archivist::aread('pbiecek/Eseje/arepo/4b8cca453f3d727e4e32f1884c889fba') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/b7feb0ffdd575e3d0b8d3517d62bd504') Load: archivist::aread('pbiecek/Eseje/arepo/8c550765c9db09d1c8aef2db7eeeb33d') Load: archivist::aread('pbiecek/Eseje/arepo/11b1afaf43d8c10a3c70f531075f89ec') Load: archivist::aread('pbiecek/Eseje/arepo/a786c052cf83fc38141d65e8660e971b') NULL NULL Load: archivist::aread('pbiecek/Eseje/arepo/f25db15c8c98eae682755205dc247922')

#do.call(grid.arrange, pl[c(1,10,11,  8,17,14,  4,5, 6)])